'''
Created on 2013-6-13

@author: liushuai
'''
import time

badCharShift = {}
goodSuffixShift = {}

buildShiftTime = 0
count = 0

def BoyerMoore(text, pattern, debug = False):
	global badCharShift, goodSuffixShift, count, buildShiftTime
	start = time.time()
	badCharShift = getOffsetBybadChar(pattern, debug)
	goodSuffixShift = getOffsetBygoodSuffix(pattern, debug)
	end = time.time()
	buildShiftTime = end - start

	pLen = len(pattern)
	tLen = len(text)
	w = pLen - 1
	while True:
		if w > tLen:
			return False
		flag = False
		for i in range(pLen)[::-1]:
			tmp = w - ( pLen - i ) + 1
			if text[tmp] != pattern[i]:
				w += max(lookupBadChar(i, text[tmp]), lookupGoodSuffx(i + 1))
				count += 1
				# w += lookupBadChar(i, text[tmp])
				if debug:
					print text
					print ' ' * (w - pLen + 1) + pattern
				flag = True
				break
		if not flag:
			return w - pLen + 1

def getOffsetBybadChar(pattern, debug = False):
	shift = {}
	charTable = {}.fromkeys(pattern).keys()
	for i in range(len(pattern)):
		for badChar in charTable:
			if pattern[i] == badChar:
				continue
			for j in range(i)[::-1]:
				if pattern[j] == badChar:
					shift[(i, badChar)] = i - j
					break
	if debug:
		print shift
	return shift

def getOffsetBygoodSuffix(pattern, debug = False):
	shift = {}
	for indexOfPattern in range(1 ,len(pattern)):
		flag = False
		for i in range(indexOfPattern, len(pattern)):
			if i == indexOfPattern:
				tmp = pattern[0:len(pattern) - 1].rfind(pattern[i:])
				if tmp != -1 and tmp != indexOfPattern:
					shift[indexOfPattern] = i - tmp
					flag = True
					break
			else:
				if pattern.startswith(pattern[i:]):
					shift[indexOfPattern] = i
					flag = True
					break
		if not flag:
			shift[indexOfPattern] = -1
	if debug:
		print shift
	return shift
			
def lookupBadChar(indexOfPattern, badChar):
	global badCharShift
	return badCharShift[(indexOfPattern, badChar)] if (indexOfPattern, badChar) in badCharShift else indexOfPattern + 1

def lookupGoodSuffx(indexOfPattern):
	global goodSuffixShift
	return goodSuffixShift[indexOfPattern] if indexOfPattern in goodSuffixShift else 0


def main():
	text = 'bbc abcdab abcdabcdabde'
	pattern = 'abcdabd'
	offset = BoyerMoore(text, pattern, True)
	if not offset:
		print 'lost'
	else :
		print 'hit at : ', offset

if __name__ == '__main__':
	main()
